Preliminares
library(tidyverse)
library(reshape2)
library(plotly)
library(kableExtra)
library(ggrepel)
#Cargo ambos datasets. Añadir explicación de qué contienen.
BCG_strain <-
read_csv("task_2-BCG_strain_per_country-1Nov2020.csv")
COVID_noformat <-
read_csv(
"task_2-COVID-19-death_cases_per_country_after_fifth_death-till_22_September_2020.csv"
)
#Limpiar datos de BCG
#Elimino columnas que sean sólo NA
BCG_strain <- BCG_strain[,apply(!is.na(BCG_strain),2, all)]
#De momento, no me interesa qué vacunas se ponían cada año, sino si se ponían o no.
#Transformo los valores de cada año en
#0 - No se ponía vacuna, hasta ahora None
#1 - Sí se ponía vacuna
#NA - Este dato es desconocido, hasta ahora Unknown
BCG_strain_no_strain <- BCG_strain
#Transformo los valores de las columnas
BCG_strain_no_strain[,-1] <- sapply(BCG_strain_no_strain[,-1], function(x){
a <- gsub("None", 0, x) %>% gsub("Unknown", NA, .) #Añado los 0 y los NA.
for (i in 1:length(a)){ #Serán 1 aquellos que no sean ni 0 ni NA
if (a[i] != "0" && !is.na(a[i])){
a[i] <- 1
}
}
return(as.integer(a)) #Cambio las columnas a integer
})
#Limpiar datos de COVID
#Elimino columnas que sean sólo NA
COVID_noNA <- COVID_noformat[,apply(!is.na(COVID_noformat),2, all)]
#En este caso, para variar, los valores vacíos están denotados como NULL,
#cambio esto a NA
COVID_Na <- sapply(COVID_noNA, function(x)gsub("NULL", NA, x))
#El resulatado de la función anterior es una string. Lo convierto a dataframe.
COVID_Na_df <- as.data.frame(COVID_Na)
#Modifico las fechas para que se almacenen como Date
COVID_Na_df[,c("date_fifth_death")] <- as.Date(COVID_Na_df[,c( "date_fifth_death")], "%d/%m/%y")
COVID_Na_df[,c("date_first_death")] <- as.Date(COVID_Na_df[,c( "date_first_death")], "%d/%m/%y")
#Modifico las muertes para que se almacenen como floats.
COVID_Na_df[,-c(1,2,3,4)] <- sapply(COVID_Na_df[,-c(1,2,3,4)], as.numeric)
#Finalmente, junto ambos dataframes en uno sólo.
COVID_BGC <- left_join(BCG_strain_no_strain, COVID_Na_df, by = "country_name")
#Reduzco los colnames, son my largos
colnames(COVID_BGC) <- gsub("mandatory_bcg_strain_", "strain", colnames(COVID_BGC)) %>%
gsub("deaths_per_million", "dpm", .)%>%
gsub("days_after_fifth_death", "d", .)%>%
gsub("stringency_index", "si", .)
opts <- options(knitr.kable.NA = "")
for (i in seq(1,61, by = 10)){
print(kable(head(COVID_BGC[,c(1,(i+1):(i+10))]), digits = 2,
caption = "Tabla 1. Vacunación de BCG por países y muertes por COVID-19",
format = "simple"))
}
Tabla 1. Vacunación de BCG por países y muertes por COVID-19
| Afghanistan |
1 |
1 |
1 |
1 |
1 |
1 |
0 |
0 |
0 |
AFG |
| Albania |
|
|
|
|
|
|
|
0 |
0 |
ALB |
| Algeria |
1 |
|
|
|
|
|
|
0 |
0 |
DZA |
| Angola |
1 |
1 |
1 |
1 |
1 |
1 |
0 |
0 |
0 |
AGO |
| Argentina |
0 |
0 |
1 |
1 |
1 |
0 |
0 |
0 |
0 |
ARG |
| Armenia |
1 |
1 |
1 |
1 |
0 |
0 |
0 |
0 |
0 |
ARM |
Tabla 1. Vacunación de BCG por países y muertes por COVID-19
| Afghanistan |
2020-03-24 |
2020-04-05 |
0.59 |
0.85 |
1.10 |
1.54 |
2.31 |
2.95 |
3.49 |
4.57 |
| Albania |
2020-03-12 |
2020-03-25 |
5.91 |
7.64 |
7.99 |
9.04 |
9.38 |
10.43 |
10.77 |
10.77 |
| Algeria |
2020-03-13 |
2020-03-18 |
0.59 |
1.32 |
3.94 |
6.27 |
7.94 |
8.94 |
9.69 |
10.33 |
| Angola |
2020-03-30 |
2020-06-12 |
0.27 |
0.30 |
0.46 |
0.58 |
0.76 |
0.85 |
0.88 |
1.22 |
| Argentina |
2020-03-08 |
2020-03-25 |
0.82 |
1.44 |
2.17 |
2.92 |
3.65 |
4.58 |
5.44 |
6.48 |
| Armenia |
2020-03-27 |
2020-04-03 |
4.72 |
6.75 |
8.10 |
10.12 |
11.81 |
14.51 |
16.20 |
20.59 |
Tabla 1. Vacunación de BCG por países y muertes por COVID-19
| Afghanistan |
5.60 |
6.32 |
7.55 |
9.48 |
11.59 |
14.03 |
15.88 |
18.52 |
21.04 |
24.07 |
| Albania |
10.77 |
10.77 |
10.77 |
11.47 |
11.47 |
11.81 |
12.51 |
13.20 |
15.29 |
18.42 |
| Algeria |
10.86 |
11.56 |
12.36 |
13.11 |
14.07 |
14.89 |
15.73 |
16.69 |
17.72 |
19.09 |
| Angola |
1.55 |
1.79 |
2.28 |
2.62 |
2.80 |
3.10 |
3.26 |
3.44 |
3.77 |
4.08 |
| Argentina |
7.28 |
8.45 |
9.85 |
11.24 |
12.59 |
14.69 |
17.37 |
20.20 |
23.08 |
26.71 |
| Armenia |
25.98 |
38.13 |
53.32 |
67.49 |
87.07 |
101.92 |
121.49 |
141.74 |
154.90 |
169.75 |
Tabla 1. Vacunación de BCG por países y muertes por COVID-19
| Afghanistan |
26.00 |
29.90 |
31.11 |
32.65 |
32.98 |
33.58 |
34.78 |
35.32 |
35.63 |
35.99 |
| Albania |
23.98 |
28.15 |
32.32 |
37.18 |
41.70 |
50.04 |
55.95 |
65.33 |
71.24 |
79.23 |
| Algeria |
20.02 |
20.80 |
21.71 |
22.71 |
23.72 |
24.79 |
26.13 |
27.36 |
28.46 |
29.69 |
| Angola |
4.38 |
|
|
|
|
|
|
|
|
|
| Argentina |
30.64 |
35.45 |
40.23 |
47.20 |
55.45 |
65.40 |
78.72 |
91.49 |
105.87 |
125.17 |
| Armenia |
190.67 |
209.23 |
228.80 |
239.94 |
252.76 |
259.85 |
268.62 |
275.71 |
282.12 |
289.55 |
Tabla 1. Vacunación de BCG por países y muertes por COVID-19
| Afghanistan |
36.12 |
84.26 |
84.26 |
84.26 |
84.26 |
84.26 |
84.26 |
84.26 |
84.26 |
84.26 |
| Albania |
83.40 |
84.26 |
84.26 |
84.26 |
89.81 |
89.81 |
89.81 |
89.81 |
89.81 |
89.81 |
| Algeria |
30.81 |
75.00 |
86.57 |
86.57 |
86.57 |
92.13 |
92.13 |
76.85 |
76.85 |
76.85 |
| Angola |
|
75.93 |
75.93 |
75.93 |
75.93 |
76.39 |
76.39 |
76.39 |
76.39 |
79.17 |
| Argentina |
145.30 |
100.00 |
100.00 |
100.00 |
100.00 |
100.00 |
88.89 |
88.89 |
88.89 |
90.74 |
| Armenia |
295.96 |
|
|
|
|
|
|
|
|
|
Tabla 1. Vacunación de BCG por países y muertes por COVID-19
| Afghanistan |
84.26 |
84.26 |
78.70 |
78.70 |
78.70 |
78.70 |
78.70 |
78.70 |
78.70 |
78.70 |
| Albania |
83.33 |
83.33 |
86.11 |
67.59 |
67.59 |
67.59 |
67.59 |
64.81 |
64.81 |
64.81 |
| Algeria |
87.96 |
76.85 |
76.85 |
76.85 |
76.85 |
76.85 |
76.85 |
65.74 |
65.74 |
65.74 |
| Angola |
79.17 |
79.17 |
79.17 |
76.39 |
76.39 |
76.39 |
81.02 |
81.02 |
81.02 |
|
| Argentina |
90.74 |
90.74 |
90.74 |
90.74 |
88.89 |
88.89 |
88.89 |
88.89 |
88.89 |
92.59 |
| Armenia |
|
|
|
|
|
|
|
|
|
|
Tabla 1. Vacunación de BCG por países y muertes por COVID-19
| Afghanistan |
78.70 |
78.70 |
78.70 |
78.70 |
71.30 |
71.30 |
48.15 |
48.15 |
21.30 |
21.30 |
| Albania |
59.26 |
59.26 |
59.26 |
59.26 |
59.26 |
59.26 |
53.70 |
53.70 |
53.70 |
53.70 |
| Algeria |
65.74 |
65.74 |
74.07 |
74.07 |
74.07 |
74.07 |
79.63 |
79.63 |
85.19 |
68.52 |
| Angola |
|
|
|
|
|
|
|
|
|
|
| Argentina |
92.59 |
92.59 |
92.59 |
90.74 |
90.74 |
90.74 |
87.96 |
87.96 |
91.67 |
91.67 |
| Armenia |
|
|
|
|
|
|
|
|
|
|
cormat <- cor(COVID_BGC %>% select(-c("country_name", "alpha_3_code", "date_first_death", "date_fifth_death", 43:71)) %>% na.omit())
cormat2 <- cormat
cormat2[upper.tri(cormat2)] <- NA #Para visualizar solamente una vez las correlaciones
cormat2 <- melt(round(cormat2, 2)) #Formato para poder usar ggplot
ggplot(cormat2, aes(x=Var1, y=Var2, fill=value)) + geom_tile()+scale_fill_continuous(type = "viridis")

fig <- plot_ly(x = colnames(cormat), y = colnames(cormat), z = cormat, type = "heatmap")
fig
ggplot(COVID_BGC, aes(x = dpm_50_d, y=`strain2005-2010`, label = country_name))+
geom_jitter(position = position_jitter(seed = 1))+
geom_label_repel(size = 2,position = position_jitter(seed = 1))+
xlim(c(-100,800))
